import_data("jake_gyllenhaal") 
Loading required package: rvest
Loading required package: xml2

Attaching package: ‘rvest’

The following object is masked from ‘package:purrr’:

    pluck

The following object is masked from ‘package:readr’:

    guess_encoding

NAs introduced by coercion
filmes = read_imported_data()
filmes %>% 
    glimpse()
Observations: 20
Variables: 5
$ avaliacao  <int> 92, 68, 73, 52, 73, 59, 82, 85, 92, 49, 35, 64, 47, 90, 87, 61, 62, 44, ...
$ filme      <chr> "Stronger", "Life", "Nocturnal Animals", "Demolition", "Everest", "South...
$ papel      <chr> "Jeff Bauman", "David Jordan", "Tony HastingsEdward Sheffield", "Davis M...
$ bilheteria <dbl> 4.2, 30.2, 10.7, 1.7, 46.6, 42.4, 61.0, 39.1, 54.7, 33.3, 90.8, 28.6, 9....
$ ano        <int> 2017, 2017, 2016, 2016, 2015, 2015, 2013, 2012, 2011, 2010, 2010, 2009, ...

Data Overview

Bilheteria

filmes %>% 
    ggplot(aes(x = ano, y = bilheteria)) + 
    geom_point(size = 4, color = paleta[1]) 

filmes %>% 
    ggplot(aes(x = bilheteria)) + 
    geom_histogram(binwidth = 10, boundary = 0, 
                   fill = "grey", color = "black") + 
    geom_rug(size = .5) +
    scale_x_continuous(breaks=seq(0,210,10))

filmes %>% 
    group_by(filme) %>%
    ggplot(aes(sample=bilheteria)) + 
        stat_qq()

p = filmes %>% 
    ggplot(aes(x = "", y = bilheteria, label = filme)) + 
    geom_jitter(width = .05, alpha = .3, size = 3) + 
    labs(x = "")
ggplotly(p)

Avaliação

filmes %>% 
    ggplot(aes(x = ano, y = avaliacao)) + 
    geom_point(size = 4, color = paleta[1])  +
    scale_y_continuous(limits = c(0, 100))

filmes %>% 
    ggplot(aes(x = avaliacao)) + 
    geom_histogram(binwidth = 10, boundary = 0, 
                   fill = paleta[3], color = "black") + 
    geom_rug(size = .5) 

filmes %>% 
    group_by(filme) %>%
    ggplot(aes(sample=avaliacao)) + 
    stat_qq() 

p = filmes %>% 
    ggplot(aes(x = "", y = avaliacao, label = filme)) + 
    geom_jitter(width = .05, alpha = .3, size = 3) + 
    labs(x = "")
ggplotly(p)

Agrupamento hierárquico

agrupamento_h = filmes %>% 
    mutate(nome = paste0(filme, " (av=", avaliacao, ")")) %>% 
    as.data.frame() %>% 
    column_to_rownames("filme") %>% 
    select(avaliacao) %>%
    dist(method = "euclidian") %>% 
    hclust(method = "ward.D")
ggdendrogram(agrupamento_h, rotate = T, size = 2, theme_dendro = F) + 
    labs(y = "Dissimilaridade", x = "", title = "Dendrograma")

get_grupos <- function(agrupamento, num_grupos){
    agrupamento %>% 
        cutree(num_grupos) %>% 
        as.data.frame() %>% 
        mutate(label = rownames(.)) %>% 
        gather(key =  "k", value = "grupo", -label) %>% 
        mutate(grupo = as.character(grupo))
}
atribuicoes = get_grupos(agrupamento_h, num_grupos = 1:6)
atribuicoes = atribuicoes %>% 
    left_join(filmes, by = c("label" = "filme"))
atribuicoes %>% 
    ggplot(aes(x = "Filmes", y = avaliacao, colour = grupo)) + 
    geom_jitter(width = .02, height = 0, size = 1.6, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2")

k_escolhido = 3
plot_atrib <-
    atribuicoes %>% 
    filter(k == k_escolhido) %>% 
    ggplot(aes(x = reorder(label, avaliacao), y = avaliacao, colour = grupo)) + 
    geom_jitter(width = .02, height = 0, size = 3, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2") + 
    labs(x = "", y = "Avaliação RT") + 
    coord_flip()
ggplotly(plot_atrib)

Com duas dimensões

agrupamento_h_2d = filmes %>%
   mutate(bilheteria = log10(bilheteria)) %>%
   mutate_at(vars("avaliacao", "bilheteria"), funs(scale)) %>%
   column_to_rownames("filme") %>%
   select("avaliacao", "bilheteria") %>%
   dist(method = "euclidean") %>%
   hclust(method = "ward.D")
Setting row names on a tibble is deprecated.
ggdendrogram(agrupamento_h_2d, rotate = TRUE, theme_dendro = F)

filmes2 <- filmes %>%
    mutate(bilheteria = log10(bilheteria))
plota_hclusts_2d(agrupamento_h_2d,
                filmes2,
                c("avaliacao", "bilheteria"),
                linkage_method = "ward.D", ks = 1:6) + 
    scale_y_log10() +
    scale_color_brewer(palette = "Dark2")
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace
the existing scale.

atribuicoes = get_grupos(agrupamento_h_2d, num_grupos = 1:6)
atribuicoes = atribuicoes %>% 
    filter(k == 3) %>%
    mutate(filme = label) %>% 
    left_join(filmes, by = "filme")
z <- atribuicoes %>%
    ggplot(aes(x = avaliacao,
               y = bilheteria,
               colour = grupo,
               text = paste(
                    "Filme:", filme,
                    "\nBilheteria:", bilheteria,"m\n",
                    "Avaliação:", avaliacao))) + 
    geom_jitter(width = .02, height = 0, size = 3, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2") +
    scale_y_log10()
ggplotly(z, tooltip = "text") %>%
    layout(autosize = F)
LS0tCnRpdGxlOiAiVGlwb3MgZGUgZmlsbWUgZGUgSmFrZSBHeWxsZW5oYWFsIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwotLS0KCmBgYHtyIGVjaG89RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGhlcmUpCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoZ2dkZW5kcm8pCgpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9saWIuUiIpKQpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9wbG90YV9zb2x1Y29lc19oY2x1c3QuUiIpKQoKdGhlbWVfc2V0KHRoZW1lX3JlcG9ydCgpKQoKa25pdHI6Om9wdHNfY2h1bmskc2V0KHRpZHkgPSBGQUxTRSwKICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aCA9IDYsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0ID0gNSwKICAgICAgICAgICAgICAgICAgICAgIGVjaG8gPSBUUlVFKQoKcGFsZXRhID0gYygiIzQwNEU0RCIsCiAgICAgICAgICAgIiM5MkRDRTUiLAogICAgICAgICAgICIjOTM4QkExIiwKICAgICAgICAgICAiIzJEMzE0MiIsCiAgICAgICAgICAgIiNGNDc0M0IiKQpgYGAKCmBgYHtyIHJlYWR9CmltcG9ydF9kYXRhKCJqYWtlX2d5bGxlbmhhYWwiKSAKZmlsbWVzID0gcmVhZF9pbXBvcnRlZF9kYXRhKCkKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdsaW1wc2UoKQpgYGAKCgojIyBEYXRhIE92ZXJ2aWV3CgojIyMgQmlsaGV0ZXJpYQoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBhbm8sIHkgPSBiaWxoZXRlcmlhKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAKYGBgCgoKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYmlsaGV0ZXJpYSkpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSArCiAgICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzPXNlcSgwLDIxMCwxMCkpCmBgYAoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBncm91cF9ieShmaWxtZSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHNhbXBsZT1iaWxoZXRlcmlhKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KcCA9IGZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSAiIiwgeSA9IGJpbGhldGVyaWEsIGxhYmVsID0gZmlsbWUpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDUsIGFscGhhID0gLjMsIHNpemUgPSAzKSArIAogICAgbGFicyh4ID0gIiIpCgpnZ3Bsb3RseShwKQpgYGAKCiMjIyBBdmFsaWHDp8OjbwoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBhbm8sIHkgPSBhdmFsaWFjYW8pKSArIAogICAgZ2VvbV9wb2ludChzaXplID0gNCwgY29sb3IgPSBwYWxldGFbMV0pICArCiAgICBzY2FsZV95X2NvbnRpbnVvdXMobGltaXRzID0gYygwLCAxMDApKQpgYGAKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYXZhbGlhY2FvKSkgKyAKICAgIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMTAsIGJvdW5kYXJ5ID0gMCwgCiAgICAgICAgICAgICAgICAgICBmaWxsID0gcGFsZXRhWzNdLCBjb2xvciA9ICJibGFjayIpICsgCiAgICBnZW9tX3J1ZyhzaXplID0gLjUpIApgYGAKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ3JvdXBfYnkoZmlsbWUpICU+JQogICAgZ2dwbG90KGFlcyhzYW1wbGU9YXZhbGlhY2FvKSkgKyAKICAgIHN0YXRfcXEoKSAKYGBgCgpgYGB7cn0KcCA9IGZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSAiIiwgeSA9IGF2YWxpYWNhbywgbGFiZWwgPSBmaWxtZSkpICsgCiAgICBnZW9tX2ppdHRlcih3aWR0aCA9IC4wNSwgYWxwaGEgPSAuMywgc2l6ZSA9IDMpICsgCiAgICBsYWJzKHggPSAiIikKCmdncGxvdGx5KHApCmBgYAoKIyMgQWdydXBhbWVudG8gaGllcsOhcnF1aWNvCgpgYGB7cn0KYWdydXBhbWVudG9faCA9IGZpbG1lcyAlPiUgCiAgICBtdXRhdGUobm9tZSA9IHBhc3RlMChmaWxtZSwgIiAoYXY9IiwgYXZhbGlhY2FvLCAiKSIpKSAlPiUgCiAgICBhcy5kYXRhLmZyYW1lKCkgJT4lIAogICAgY29sdW1uX3RvX3Jvd25hbWVzKCJmaWxtZSIpICU+JSAKICAgIHNlbGVjdChhdmFsaWFjYW8pICU+JQogICAgZGlzdChtZXRob2QgPSAiZXVjbGlkaWFuIikgJT4lIAogICAgaGNsdXN0KG1ldGhvZCA9ICJ3YXJkLkQiKQoKZ2dkZW5kcm9ncmFtKGFncnVwYW1lbnRvX2gsIHJvdGF0ZSA9IFQsIHNpemUgPSAyLCB0aGVtZV9kZW5kcm8gPSBGKSArIAogICAgbGFicyh5ID0gIkRpc3NpbWlsYXJpZGFkZSIsIHggPSAiIiwgdGl0bGUgPSAiRGVuZHJvZ3JhbWEiKQpgYGAKCmBgYHtyfQpnZXRfZ3J1cG9zIDwtIGZ1bmN0aW9uKGFncnVwYW1lbnRvLCBudW1fZ3J1cG9zKXsKICAgIGFncnVwYW1lbnRvICU+JSAKICAgICAgICBjdXRyZWUobnVtX2dydXBvcykgJT4lIAogICAgICAgIGFzLmRhdGEuZnJhbWUoKSAlPiUgCiAgICAgICAgbXV0YXRlKGxhYmVsID0gcm93bmFtZXMoLikpICU+JSAKICAgICAgICBnYXRoZXIoa2V5ID0gICJrIiwgdmFsdWUgPSAiZ3J1cG8iLCAtbGFiZWwpICU+JSAKICAgICAgICBtdXRhdGUoZ3J1cG8gPSBhcy5jaGFyYWN0ZXIoZ3J1cG8pKQp9CgphdHJpYnVpY29lcyA9IGdldF9ncnVwb3MoYWdydXBhbWVudG9faCwgbnVtX2dydXBvcyA9IDE6NikKCmF0cmlidWljb2VzID0gYXRyaWJ1aWNvZXMgJT4lIAogICAgbGVmdF9qb2luKGZpbG1lcywgYnkgPSBjKCJsYWJlbCIgPSAiZmlsbWUiKSkKCmF0cmlidWljb2VzICU+JSAKICAgIGdncGxvdChhZXMoeCA9ICJGaWxtZXMiLCB5ID0gYXZhbGlhY2FvLCBjb2xvdXIgPSBncnVwbykpICsgCiAgICBnZW9tX2ppdHRlcih3aWR0aCA9IC4wMiwgaGVpZ2h0ID0gMCwgc2l6ZSA9IDEuNiwgYWxwaGEgPSAuNikgKyAKICAgIGZhY2V0X3dyYXAofiBwYXN0ZShrLCAiIGdydXBvcyIpKSArIAogICAgc2NhbGVfY29sb3JfYnJld2VyKHBhbGV0dGUgPSAiRGFyazIiKQoKYGBgCgpgYGB7cn0Ka19lc2NvbGhpZG8gPSAzCgpwbG90X2F0cmliIDwtCiAgICBhdHJpYnVpY29lcyAlPiUgCiAgICBmaWx0ZXIoayA9PSBrX2VzY29saGlkbykgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gcmVvcmRlcihsYWJlbCwgYXZhbGlhY2FvKSwgeSA9IGF2YWxpYWNhbywgY29sb3VyID0gZ3J1cG8pKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDIsIGhlaWdodCA9IDAsIHNpemUgPSAzLCBhbHBoYSA9IC42KSArIAogICAgZmFjZXRfd3JhcCh+IHBhc3RlKGssICIgZ3J1cG9zIikpICsgCiAgICBzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZSA9ICJEYXJrMiIpICsgCiAgICBsYWJzKHggPSAiIiwgeSA9ICJBdmFsaWHDp8OjbyBSVCIpICsgCiAgICBjb29yZF9mbGlwKCkKCmdncGxvdGx5KHBsb3RfYXRyaWIpCmBgYAoKIyMgQ29tIGR1YXMgZGltZW5zw7VlcwoKYGBge3J9CmFncnVwYW1lbnRvX2hfMmQgPSBmaWxtZXMgJT4lCiAgIG11dGF0ZShiaWxoZXRlcmlhID0gbG9nMTAoYmlsaGV0ZXJpYSkpICU+JQogICBtdXRhdGVfYXQodmFycygiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSwgZnVucyhzY2FsZSkpICU+JQogICBjb2x1bW5fdG9fcm93bmFtZXMoImZpbG1lIikgJT4lCiAgIHNlbGVjdCgiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSAlPiUKICAgZGlzdChtZXRob2QgPSAiZXVjbGlkZWFuIikgJT4lCiAgIGhjbHVzdChtZXRob2QgPSAid2FyZC5EIikKCmdnZGVuZHJvZ3JhbShhZ3J1cGFtZW50b19oXzJkLCByb3RhdGUgPSBUUlVFLCB0aGVtZV9kZW5kcm8gPSBGKQpgYGAKCmBgYHtyfQpmaWxtZXMyIDwtIGZpbG1lcyAlPiUKICAgIG11dGF0ZShiaWxoZXRlcmlhID0gbG9nMTAoYmlsaGV0ZXJpYSkpCgpwbG90YV9oY2x1c3RzXzJkKGFncnVwYW1lbnRvX2hfMmQsCiAgICAgICAgICAgICAgICBmaWxtZXMyLAogICAgICAgICAgICAgICAgYygiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSwKICAgICAgICAgICAgICAgIGxpbmthZ2VfbWV0aG9kID0gIndhcmQuRCIsIGtzID0gMTo2KSArIAogICAgc2NhbGVfeV9sb2cxMCgpICsKICAgIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikKYGBgCgpgYGB7cn0KYXRyaWJ1aWNvZXMgPSBnZXRfZ3J1cG9zKGFncnVwYW1lbnRvX2hfMmQsIG51bV9ncnVwb3MgPSAxOjYpCgphdHJpYnVpY29lcyA9IGF0cmlidWljb2VzICU+JSAKICAgIGZpbHRlcihrID09IDMpICU+JQogICAgbXV0YXRlKGZpbG1lID0gbGFiZWwpICU+JSAKICAgIGxlZnRfam9pbihmaWxtZXMsIGJ5ID0gImZpbG1lIikKCnogPC0gYXRyaWJ1aWNvZXMgJT4lCiAgICBnZ3Bsb3QoYWVzKHggPSBhdmFsaWFjYW8sCiAgICAgICAgICAgICAgIHkgPSBiaWxoZXRlcmlhLAogICAgICAgICAgICAgICBjb2xvdXIgPSBncnVwbywKICAgICAgICAgICAgICAgdGV4dCA9IHBhc3RlKAogICAgICAgICAgICAgICAgICAgICJGaWxtZToiLCBmaWxtZSwKICAgICAgICAgICAgICAgICAgICAiXG5CaWxoZXRlcmlhOiIsIGJpbGhldGVyaWEsIm1cbiIsCiAgICAgICAgICAgICAgICAgICAgIkF2YWxpYcOnw6NvOiIsIGF2YWxpYWNhbykpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDIsIGhlaWdodCA9IDAsIHNpemUgPSAzLCBhbHBoYSA9IC42KSArIAogICAgZmFjZXRfd3JhcCh+IHBhc3RlKGssICIgZ3J1cG9zIikpICsgCiAgICBzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZSA9ICJEYXJrMiIpICsKICAgIHNjYWxlX3lfbG9nMTAoKQoKZ2dwbG90bHkoeiwgdG9vbHRpcCA9ICJ0ZXh0IikgJT4lCiAgICBsYXlvdXQoYXV0b3NpemUgPSBGKQpgYGAK